/* cast5.c  -  CAST5 cipher (RFC2144)
 *	Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
 *
 * This file is part of Libgcrypt.
 *
 * Libgcrypt is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser general Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * Libgcrypt is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 */

/* Test vectors:
 *
 * 128-bit key	       = 01 23 45 67 12 34 56 78 23 45 67 89 34 56 78 9A
 *	   plaintext   = 01 23 45 67 89 AB CD EF
 *	   ciphertext  = 23 8B 4F E5 84 7E 44 B2
 *
 * 80-bit  key	       = 01 23 45 67 12 34 56 78 23 45
 *		       = 01 23 45 67 12 34 56 78 23 45 00 00 00 00 00 00
 *	   plaintext   = 01 23 45 67 89 AB CD EF
 *	   ciphertext  = EB 6A 71 1A 2C 02 27 1B
 *
 * 40-bit  key	       = 01 23 45 67 12
 *		       = 01 23 45 67 12 00 00 00 00 00 00 00 00 00 00 00
 *	   plaintext   = 01 23 45 67 89 AB CD EF
 *	   ciphertext  = 7A C8 16 D1 6E 9B 30 2E
 */

#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "g10lib.h"
#include "types.h"
#include "cipher.h"
#include "bithelp.h"
#include "bufhelp.h"
#include "cipher-internal.h"
#include "cipher-selftest.h"

/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
#undef USE_AMD64_ASM
#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AMD64_ASM 1
#endif

/* USE_ARM_ASM indicates whether to use ARM assembly code. */
#undef USE_ARM_ASM
#if defined(__ARMEL__)
# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
#  define USE_ARM_ASM 1
# endif
#endif

#define CAST5_BLOCKSIZE 8

typedef struct {
    u32  Km[16];
    byte Kr[16];
#ifdef USE_ARM_ASM
    u32 Kr_arm_enc[16 / sizeof(u32)];
    u32 Kr_arm_dec[16 / sizeof(u32)];
#endif
} CAST5_context;

static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen,
                                    cipher_bulk_ops_t *bulk_ops);
static unsigned int encrypt_block (void *c, byte *outbuf, const byte *inbuf);
static unsigned int decrypt_block (void *c, byte *outbuf, const byte *inbuf);



#define s1 _gcry_cast5_s1to4[0]
#define s2 _gcry_cast5_s1to4[1]
#define s3 _gcry_cast5_s1to4[2]
#define s4 _gcry_cast5_s1to4[3]

const u32 _gcry_cast5_s1to4[4][256] = { {
0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949,
0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e,
0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d,
0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1, 0xaa54166b, 0x22568e3a, 0xa2d341d0,
0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7,
0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0, 0x90ecf52e, 0x22b0c054, 0xbc8e5935,
0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290, 0xe93b159f, 0xb48ee411, 0x4bff345d,
0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50,
0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f, 0xc59c5319, 0xb949e354, 0xb04669fe,
0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5, 0x6a390493, 0xe63d37e0, 0x2a54f6b3,
0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167,
0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427, 0xa2d1936b, 0x2ad286af, 0xaa56d291,
0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d, 0x73e2bb14, 0xa0bebc3c, 0x54623779,
0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e, 0x89fe78e6, 0x3fab0950, 0x325ff6c2,
0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf, 0x380782d5, 0xc7fa5cf6, 0x8ac31511,
0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241, 0x051ef495, 0xaa573b04, 0x4a805d8d,
0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5,
0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324,
0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3, 0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c,
0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc,
0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d,
0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da, 0x4736f464, 0x5ad328d8, 0xb347cc96,
0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a,
0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d,
0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4, 0xb11c3274, 0xdd24cb9e, 0x7e1c54bd,
0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6,
0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a, 0x3526ffa0, 0xc37b4d09, 0xbc306ed9,
0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf, 0x700b45e1, 0xd5ea50f1, 0x85a92872,
0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814c,
0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e,
0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, 0xb141ab08, 0x7cca89b9,
0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf
}, {
0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651,
0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3,
0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb,
0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb, 0xe4e7ef5b, 0x25a1ff41, 0xe180f806,
0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b,
0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154, 0x0d554b63, 0x5d681121, 0xc866c359,
0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181, 0x39f7627f, 0x361e3084, 0xe4eb573b,
0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c,
0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a, 0x8f458c74, 0xd9e0a227, 0x4ec73a34,
0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c, 0x1d804366, 0x721d9bfd, 0xa58684bb,
0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd,
0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9, 0xe0b56714, 0x21f043b7, 0xe5d05860,
0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf, 0x68561be6, 0x83ca6b94, 0x2d6ed23b,
0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304,
0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122, 0xb96726d1, 0x8049a7e8, 0x22b7da7b,
0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402, 0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf,
0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c,
0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6, 0x30a22c95, 0x31a70850, 0x60930f13,
0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6, 0xa02b1741, 0x7cbad9a2, 0x2180036f,
0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6,
0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb, 0x846a3bae, 0x8ff77888, 0xee5d60f6,
0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54, 0x157fd7fa, 0xef8579cc, 0xd152de58,
0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906,
0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8, 0xbec0c560, 0x61a3c9e8, 0xbca8f54d,
0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc, 0x301e16e6, 0x273be979, 0xb0ffeaa6,
0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4,
0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e, 0x1a513742, 0xef6828bc, 0x520365d6,
0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb, 0x5eea29cb, 0x145892f5, 0x91584f7f,
0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249,
0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa,
0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, 0x5c038323, 0x3e5d3bb9,
0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1
}, {
0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90,
0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5,
0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e,
0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee, 0x825b1bfd, 0x9255c5ed, 0x1257a240,
0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf, 0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5,
0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1, 0x1fb78dfc, 0x8e6bd2c1, 0x437be59b,
0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c, 0x4a012d6e, 0xc5884a28, 0xccc36f71,
0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04,
0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e, 0x727cc3c4, 0x0a0fb402, 0x0f7fef82,
0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0, 0x1eac5790, 0x796fb449, 0x8252dc15,
0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2,
0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574, 0x927985b2, 0x8276dbcb, 0x02778176,
0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83, 0x340ce5c8, 0x96bbb682, 0x93b4b148,
0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc,
0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e, 0x3cf8209d, 0x6094d1e3, 0xcd9ca341,
0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9, 0xbda8229c, 0x127dadaa, 0x438a074e,
0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51,
0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a, 0x76a2e214, 0xb9a40368, 0x925d958f,
0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623, 0x193cbcfa, 0x27627545, 0x825cf47a,
0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b,
0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb, 0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b,
0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11, 0x236a5cae, 0x12deca4d, 0x2c3f8cc5,
0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45,
0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40, 0x7c34671c, 0x02717ef6, 0x4feb5536,
0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1, 0x006e1888, 0xa2e53f55, 0xb9e6d4bc,
0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0,
0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff, 0x856302e0, 0x72dbd92b, 0xee971b69,
0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2, 0x61efc8c2, 0xf1ac2571, 0xcc8239c2,
0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49,
0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d,
0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, 0x52bce688, 0x1b03588a,
0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783
}, {
0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1,
0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf,
0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15,
0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f, 0x0c13fefe, 0x081b08ca, 0x05170121,
0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25,
0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400, 0x547eebe6, 0x446d4ca0, 0x6cf3d6f5,
0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061, 0x11b638e1, 0x72500e03, 0xf80eb2bb,
0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5,
0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea, 0x9f926f91, 0x9f46222f, 0x3991467d,
0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8, 0x3fb6180c, 0x18f8931e, 0x281658e6,
0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23,
0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9, 0x0014377b, 0x041e8ac8, 0x09114003,
0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de, 0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6,
0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119,
0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d, 0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24,
0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a, 0xeca1d7c7, 0x041afa32, 0x1d16625a,
0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79,
0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3, 0xedda04eb, 0x17a9be04, 0x2c18f4df,
0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254, 0xe5b6a035, 0x213d42f6, 0x2c1c7c26,
0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab,
0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86, 0x311170a7, 0x3e9b640c, 0xcc3e10d7,
0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1, 0x1f9af36e, 0xcfcbd12f, 0xc1de8417,
0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca, 0xb4be31cd, 0xd8782806, 0x12a3a4e2,
0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5, 0x9711aac5, 0x001d7b95, 0x82e5e7d2,
0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415, 0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a,
0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919,
0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe, 0x8b75e387, 0xb3c50651, 0xb8a5c3ef,
0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb, 0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876,
0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8, 0x296b299e, 0x492fc295, 0x9266beab,
0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04,
0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, 0xb657c34d, 0x4edfd282,
0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0, 0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2
} };
static const u32 s5[256] = {
0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f,
0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a,
0xe6a2e77f, 0xf0c720cd, 0xc4494816, 0xccf5c180, 0x38851640, 0x15b0a848, 0xe68b18cb, 0x4caadeff,
0x5f480a01, 0x0412b2aa, 0x259814fc, 0x41d0efe2, 0x4e40b48d, 0x248eb6fb, 0x8dba1cfe, 0x41a99b02,
0x1a550a04, 0xba8f65cb, 0x7251f4e7, 0x95a51725, 0xc106ecd7, 0x97a5980a, 0xc539b9aa, 0x4d79fe6a,
0xf2f3f763, 0x68af8040, 0xed0c9e56, 0x11b4958b, 0xe1eb5a88, 0x8709e6b0, 0xd7e07156, 0x4e29fea7,
0x6366e52d, 0x02d1c000, 0xc4ac8e05, 0x9377f571, 0x0c05372a, 0x578535f2, 0x2261be02, 0xd642a0c9,
0xdf13a280, 0x74b55bd2, 0x682199c0, 0xd421e5ec, 0x53fb3ce8, 0xc8adedb3, 0x28a87fc9, 0x3d959981,
0x5c1ff900, 0xfe38d399, 0x0c4eff0b, 0x062407ea, 0xaa2f4fb1, 0x4fb96976, 0x90c79505, 0xb0a8a774,
0xef55a1ff, 0xe59ca2c2, 0xa6b62d27, 0xe66a4263, 0xdf65001f, 0x0ec50966, 0xdfdd55bc, 0x29de0655,
0x911e739a, 0x17af8975, 0x32c7911c, 0x89f89468, 0x0d01e980, 0x524755f4, 0x03b63cc9, 0x0cc844b2,
0xbcf3f0aa, 0x87ac36e9, 0xe53a7426, 0x01b3d82b, 0x1a9e7449, 0x64ee2d7e, 0xcddbb1da, 0x01c94910,
0xb868bf80, 0x0d26f3fd, 0x9342ede7, 0x04a5c284, 0x636737b6, 0x50f5b616, 0xf24766e3, 0x8eca36c1,
0x136e05db, 0xfef18391, 0xfb887a37, 0xd6e7f7d4, 0xc7fb7dc9, 0x3063fcdf, 0xb6f589de, 0xec2941da,
0x26e46695, 0xb7566419, 0xf654efc5, 0xd08d58b7, 0x48925401, 0xc1bacb7f, 0xe5ff550f, 0xb6083049,
0x5bb5d0e8, 0x87d72e5a, 0xab6a6ee1, 0x223a66ce, 0xc62bf3cd, 0x9e0885f9, 0x68cb3e47, 0x086c010f,
0xa21de820, 0xd18b69de, 0xf3f65777, 0xfa02c3f6, 0x407edac3, 0xcbb3d550, 0x1793084d, 0xb0d70eba,
0x0ab378d5, 0xd951fb0c, 0xded7da56, 0x4124bbe4, 0x94ca0b56, 0x0f5755d1, 0xe0e1e56e, 0x6184b5be,
0x580a249f, 0x94f74bc0, 0xe327888e, 0x9f7b5561, 0xc3dc0280, 0x05687715, 0x646c6bd7, 0x44904db3,
0x66b4f0a3, 0xc0f1648a, 0x697ed5af, 0x49e92ff6, 0x309e374f, 0x2cb6356a, 0x85808573, 0x4991f840,
0x76f0ae02, 0x083be84d, 0x28421c9a, 0x44489406, 0x736e4cb8, 0xc1092910, 0x8bc95fc6, 0x7d869cf4,
0x134f616f, 0x2e77118d, 0xb31b2be1, 0xaa90b472, 0x3ca5d717, 0x7d161bba, 0x9cad9010, 0xaf462ba2,
0x9fe459d2, 0x45d34559, 0xd9f2da13, 0xdbc65487, 0xf3e4f94e, 0x176d486f, 0x097c13ea, 0x631da5c7,
0x445f7382, 0x175683f4, 0xcdc66a97, 0x70be0288, 0xb3cdcf72, 0x6e5dd2f3, 0x20936079, 0x459b80a5,
0xbe60e2db, 0xa9c23101, 0xeba5315c, 0x224e42f2, 0x1c5c1572, 0xf6721b2c, 0x1ad2fff3, 0x8c25404e,
0x324ed72f, 0x4067b7fd, 0x0523138e, 0x5ca3bc78, 0xdc0fd66e, 0x75922283, 0x784d6b17, 0x58ebb16e,
0x44094f85, 0x3f481d87, 0xfcfeae7b, 0x77b5ff76, 0x8c2302bf, 0xaaf47556, 0x5f46b02a, 0x2b092801,
0x3d38f5f7, 0x0ca81f36, 0x52af4a8a, 0x66d5e7c0, 0xdf3b0874, 0x95055110, 0x1b5ad7a8, 0xf61ed5ad,
0x6cf6e479, 0x20758184, 0xd0cefa65, 0x88f7be58, 0x4a046826, 0x0ff6f8f3, 0xa09c7f70, 0x5346aba0,
0x5ce96c28, 0xe176eda3, 0x6bac307f, 0x376829d2, 0x85360fa9, 0x17e3fe2a, 0x24b79767, 0xf5a96b20,
0xd6cd2595, 0x68ff1ebf, 0x7555442c, 0xf19f06be, 0xf9e0659a, 0xeeb9491d, 0x34010718, 0xbb30cab8,
0xe822fe15, 0x88570983, 0x750e6249, 0xda627e55, 0x5e76ffa8, 0xb1534546, 0x6d47de08, 0xefe9e7d4
};
static const u32 s6[256] = {
0xf6fa8f9d, 0x2cac6ce1, 0x4ca34867, 0xe2337f7c, 0x95db08e7, 0x016843b4, 0xeced5cbc, 0x325553ac,
0xbf9f0960, 0xdfa1e2ed, 0x83f0579d, 0x63ed86b9, 0x1ab6a6b8, 0xde5ebe39, 0xf38ff732, 0x8989b138,
0x33f14961, 0xc01937bd, 0xf506c6da, 0xe4625e7e, 0xa308ea99, 0x4e23e33c, 0x79cbd7cc, 0x48a14367,
0xa3149619, 0xfec94bd5, 0xa114174a, 0xeaa01866, 0xa084db2d, 0x09a8486f, 0xa888614a, 0x2900af98,
0x01665991, 0xe1992863, 0xc8f30c60, 0x2e78ef3c, 0xd0d51932, 0xcf0fec14, 0xf7ca07d2, 0xd0a82072,
0xfd41197e, 0x9305a6b0, 0xe86be3da, 0x74bed3cd, 0x372da53c, 0x4c7f4448, 0xdab5d440, 0x6dba0ec3,
0x083919a7, 0x9fbaeed9, 0x49dbcfb0, 0x4e670c53, 0x5c3d9c01, 0x64bdb941, 0x2c0e636a, 0xba7dd9cd,
0xea6f7388, 0xe70bc762, 0x35f29adb, 0x5c4cdd8d, 0xf0d48d8c, 0xb88153e2, 0x08a19866, 0x1ae2eac8,
0x284caf89, 0xaa928223, 0x9334be53, 0x3b3a21bf, 0x16434be3, 0x9aea3906, 0xefe8c36e, 0xf890cdd9,
0x80226dae, 0xc340a4a3, 0xdf7e9c09, 0xa694a807, 0x5b7c5ecc, 0x221db3a6, 0x9a69a02f, 0x68818a54,
0xceb2296f, 0x53c0843a, 0xfe893655, 0x25bfe68a, 0xb4628abc, 0xcf222ebf, 0x25ac6f48, 0xa9a99387,
0x53bddb65, 0xe76ffbe7, 0xe967fd78, 0x0ba93563, 0x8e342bc1, 0xe8a11be9, 0x4980740d, 0xc8087dfc,
0x8de4bf99, 0xa11101a0, 0x7fd37975, 0xda5a26c0, 0xe81f994f, 0x9528cd89, 0xfd339fed, 0xb87834bf,
0x5f04456d, 0x22258698, 0xc9c4c83b, 0x2dc156be, 0x4f628daa, 0x57f55ec5, 0xe2220abe, 0xd2916ebf,
0x4ec75b95, 0x24f2c3c0, 0x42d15d99, 0xcd0d7fa0, 0x7b6e27ff, 0xa8dc8af0, 0x7345c106, 0xf41e232f,
0x35162386, 0xe6ea8926, 0x3333b094, 0x157ec6f2, 0x372b74af, 0x692573e4, 0xe9a9d848, 0xf3160289,
0x3a62ef1d, 0xa787e238, 0xf3a5f676, 0x74364853, 0x20951063, 0x4576698d, 0xb6fad407, 0x592af950,
0x36f73523, 0x4cfb6e87, 0x7da4cec0, 0x6c152daa, 0xcb0396a8, 0xc50dfe5d, 0xfcd707ab, 0x0921c42f,
0x89dff0bb, 0x5fe2be78, 0x448f4f33, 0x754613c9, 0x2b05d08d, 0x48b9d585, 0xdc049441, 0xc8098f9b,
0x7dede786, 0xc39a3373, 0x42410005, 0x6a091751, 0x0ef3c8a6, 0x890072d6, 0x28207682, 0xa9a9f7be,
0xbf32679d, 0xd45b5b75, 0xb353fd00, 0xcbb0e358, 0x830f220a, 0x1f8fb214, 0xd372cf08, 0xcc3c4a13,
0x8cf63166, 0x061c87be, 0x88c98f88, 0x6062e397, 0x47cf8e7a, 0xb6c85283, 0x3cc2acfb, 0x3fc06976,
0x4e8f0252, 0x64d8314d, 0xda3870e3, 0x1e665459, 0xc10908f0, 0x513021a5, 0x6c5b68b7, 0x822f8aa0,
0x3007cd3e, 0x74719eef, 0xdc872681, 0x073340d4, 0x7e432fd9, 0x0c5ec241, 0x8809286c, 0xf592d891,
0x08a930f6, 0x957ef305, 0xb7fbffbd, 0xc266e96f, 0x6fe4ac98, 0xb173ecc0, 0xbc60b42a, 0x953498da,
0xfba1ae12, 0x2d4bd736, 0x0f25faab, 0xa4f3fceb, 0xe2969123, 0x257f0c3d, 0x9348af49, 0x361400bc,
0xe8816f4a, 0x3814f200, 0xa3f94043, 0x9c7a54c2, 0xbc704f57, 0xda41e7f9, 0xc25ad33a, 0x54f4a084,
0xb17f5505, 0x59357cbe, 0xedbd15c8, 0x7f97c5ab, 0xba5ac7b5, 0xb6f6deaf, 0x3a479c3a, 0x5302da25,
0x653d7e6a, 0x54268d49, 0x51a477ea, 0x5017d55b, 0xd7d25d88, 0x44136c76, 0x0404a8c8, 0xb8e5a121,
0xb81a928a, 0x60ed5869, 0x97c55b96, 0xeaec991b, 0x29935913, 0x01fdb7f1, 0x088e8dfa, 0x9ab6f6f5,
0x3b4cbf9f, 0x4a5de3ab, 0xe6051d35, 0xa0e1d855, 0xd36b4cf1, 0xf544edeb, 0xb0e93524, 0xbebb8fbd,
0xa2d762cf, 0x49c92f54, 0x38b5f331, 0x7128a454, 0x48392905, 0xa65b1db8, 0x851c97bd, 0xd675cf2f
};
static const u32 s7[256] = {
0x85e04019, 0x332bf567, 0x662dbfff, 0xcfc65693, 0x2a8d7f6f, 0xab9bc912, 0xde6008a1, 0x2028da1f,
0x0227bce7, 0x4d642916, 0x18fac300, 0x50f18b82, 0x2cb2cb11, 0xb232e75c, 0x4b3695f2, 0xb28707de,
0xa05fbcf6, 0xcd4181e9, 0xe150210c, 0xe24ef1bd, 0xb168c381, 0xfde4e789, 0x5c79b0d8, 0x1e8bfd43,
0x4d495001, 0x38be4341, 0x913cee1d, 0x92a79c3f, 0x089766be, 0xbaeeadf4, 0x1286becf, 0xb6eacb19,
0x2660c200, 0x7565bde4, 0x64241f7a, 0x8248dca9, 0xc3b3ad66, 0x28136086, 0x0bd8dfa8, 0x356d1cf2,
0x107789be, 0xb3b2e9ce, 0x0502aa8f, 0x0bc0351e, 0x166bf52a, 0xeb12ff82, 0xe3486911, 0xd34d7516,
0x4e7b3aff, 0x5f43671b, 0x9cf6e037, 0x4981ac83, 0x334266ce, 0x8c9341b7, 0xd0d854c0, 0xcb3a6c88,
0x47bc2829, 0x4725ba37, 0xa66ad22b, 0x7ad61f1e, 0x0c5cbafa, 0x4437f107, 0xb6e79962, 0x42d2d816,
0x0a961288, 0xe1a5c06e, 0x13749e67, 0x72fc081a, 0xb1d139f7, 0xf9583745, 0xcf19df58, 0xbec3f756,
0xc06eba30, 0x07211b24, 0x45c28829, 0xc95e317f, 0xbc8ec511, 0x38bc46e9, 0xc6e6fa14, 0xbae8584a,
0xad4ebc46, 0x468f508b, 0x7829435f, 0xf124183b, 0x821dba9f, 0xaff60ff4, 0xea2c4e6d, 0x16e39264,
0x92544a8b, 0x009b4fc3, 0xaba68ced, 0x9ac96f78, 0x06a5b79a, 0xb2856e6e, 0x1aec3ca9, 0xbe838688,
0x0e0804e9, 0x55f1be56, 0xe7e5363b, 0xb3a1f25d, 0xf7debb85, 0x61fe033c, 0x16746233, 0x3c034c28,
0xda6d0c74, 0x79aac56c, 0x3ce4e1ad, 0x51f0c802, 0x98f8f35a, 0x1626a49f, 0xeed82b29, 0x1d382fe3,
0x0c4fb99a, 0xbb325778, 0x3ec6d97b, 0x6e77a6a9, 0xcb658b5c, 0xd45230c7, 0x2bd1408b, 0x60c03eb7,
0xb9068d78, 0xa33754f4, 0xf430c87d, 0xc8a71302, 0xb96d8c32, 0xebd4e7be, 0xbe8b9d2d, 0x7979fb06,
0xe7225308, 0x8b75cf77, 0x11ef8da4, 0xe083c858, 0x8d6b786f, 0x5a6317a6, 0xfa5cf7a0, 0x5dda0033,
0xf28ebfb0, 0xf5b9c310, 0xa0eac280, 0x08b9767a, 0xa3d9d2b0, 0x79d34217, 0x021a718d, 0x9ac6336a,
0x2711fd60, 0x438050e3, 0x069908a8, 0x3d7fedc4, 0x826d2bef, 0x4eeb8476, 0x488dcf25, 0x36c9d566,
0x28e74e41, 0xc2610aca, 0x3d49a9cf, 0xbae3b9df, 0xb65f8de6, 0x92aeaf64, 0x3ac7d5e6, 0x9ea80509,
0xf22b017d, 0xa4173f70, 0xdd1e16c3, 0x15e0d7f9, 0x50b1b887, 0x2b9f4fd5, 0x625aba82, 0x6a017962,
0x2ec01b9c, 0x15488aa9, 0xd716e740, 0x40055a2c, 0x93d29a22, 0xe32dbf9a, 0x058745b9, 0x3453dc1e,
0xd699296e, 0x496cff6f, 0x1c9f4986, 0xdfe2ed07, 0xb87242d1, 0x19de7eae, 0x053e561a, 0x15ad6f8c,
0x66626c1c, 0x7154c24c, 0xea082b2a, 0x93eb2939, 0x17dcb0f0, 0x58d4f2ae, 0x9ea294fb, 0x52cf564c,
0x9883fe66, 0x2ec40581, 0x763953c3, 0x01d6692e, 0xd3a0c108, 0xa1e7160e, 0xe4f2dfa6, 0x693ed285,
0x74904698, 0x4c2b0edd, 0x4f757656, 0x5d393378, 0xa132234f, 0x3d321c5d, 0xc3f5e194, 0x4b269301,
0xc79f022f, 0x3c997e7e, 0x5e4f9504, 0x3ffafbbd, 0x76f7ad0e, 0x296693f4, 0x3d1fce6f, 0xc61e45be,
0xd3b5ab34, 0xf72bf9b7, 0x1b0434c0, 0x4e72b567, 0x5592a33d, 0xb5229301, 0xcfd2a87f, 0x60aeb767,
0x1814386b, 0x30bcc33d, 0x38a0c07d, 0xfd1606f2, 0xc363519b, 0x589dd390, 0x5479f8e6, 0x1cb8d647,
0x97fd61a9, 0xea7759f4, 0x2d57539d, 0x569a58cf, 0xe84e63ad, 0x462e1b78, 0x6580f87e, 0xf3817914,
0x91da55f4, 0x40a230f3, 0xd1988f35, 0xb6e318d2, 0x3ffa50bc, 0x3d40f021, 0xc3c0bdae, 0x4958c24c,
0x518f36b2, 0x84b1d370, 0x0fedce83, 0x878ddada, 0xf2a279c7, 0x94e01be8, 0x90716f4b, 0x954b8aa3
};
static const u32 s8[256] = {
0xe216300d, 0xbbddfffc, 0xa7ebdabd, 0x35648095, 0x7789f8b7, 0xe6c1121b, 0x0e241600, 0x052ce8b5,
0x11a9cfb0, 0xe5952f11, 0xece7990a, 0x9386d174, 0x2a42931c, 0x76e38111, 0xb12def3a, 0x37ddddfc,
0xde9adeb1, 0x0a0cc32c, 0xbe197029, 0x84a00940, 0xbb243a0f, 0xb4d137cf, 0xb44e79f0, 0x049eedfd,
0x0b15a15d, 0x480d3168, 0x8bbbde5a, 0x669ded42, 0xc7ece831, 0x3f8f95e7, 0x72df191b, 0x7580330d,
0x94074251, 0x5c7dcdfa, 0xabbe6d63, 0xaa402164, 0xb301d40a, 0x02e7d1ca, 0x53571dae, 0x7a3182a2,
0x12a8ddec, 0xfdaa335d, 0x176f43e8, 0x71fb46d4, 0x38129022, 0xce949ad4, 0xb84769ad, 0x965bd862,
0x82f3d055, 0x66fb9767, 0x15b80b4e, 0x1d5b47a0, 0x4cfde06f, 0xc28ec4b8, 0x57e8726e, 0x647a78fc,
0x99865d44, 0x608bd593, 0x6c200e03, 0x39dc5ff6, 0x5d0b00a3, 0xae63aff2, 0x7e8bd632, 0x70108c0c,
0xbbd35049, 0x2998df04, 0x980cf42a, 0x9b6df491, 0x9e7edd53, 0x06918548, 0x58cb7e07, 0x3b74ef2e,
0x522fffb1, 0xd24708cc, 0x1c7e27cd, 0xa4eb215b, 0x3cf1d2e2, 0x19b47a38, 0x424f7618, 0x35856039,
0x9d17dee7, 0x27eb35e6, 0xc9aff67b, 0x36baf5b8, 0x09c467cd, 0xc18910b1, 0xe11dbf7b, 0x06cd1af8,
0x7170c608, 0x2d5e3354, 0xd4de495a, 0x64c6d006, 0xbcc0c62c, 0x3dd00db3, 0x708f8f34, 0x77d51b42,
0x264f620f, 0x24b8d2bf, 0x15c1b79e, 0x46a52564, 0xf8d7e54e, 0x3e378160, 0x7895cda5, 0x859c15a5,
0xe6459788, 0xc37bc75f, 0xdb07ba0c, 0x0676a3ab, 0x7f229b1e, 0x31842e7b, 0x24259fd7, 0xf8bef472,
0x835ffcb8, 0x6df4c1f2, 0x96f5b195, 0xfd0af0fc, 0xb0fe134c, 0xe2506d3d, 0x4f9b12ea, 0xf215f225,
0xa223736f, 0x9fb4c428, 0x25d04979, 0x34c713f8, 0xc4618187, 0xea7a6e98, 0x7cd16efc, 0x1436876c,
0xf1544107, 0xbedeee14, 0x56e9af27, 0xa04aa441, 0x3cf7c899, 0x92ecbae6, 0xdd67016d, 0x151682eb,
0xa842eedf, 0xfdba60b4, 0xf1907b75, 0x20e3030f, 0x24d8c29e, 0xe139673b, 0xefa63fb8, 0x71873054,
0xb6f2cf3b, 0x9f326442, 0xcb15a4cc, 0xb01a4504, 0xf1e47d8d, 0x844a1be5, 0xbae7dfdc, 0x42cbda70,
0xcd7dae0a, 0x57e85b7a, 0xd53f5af6, 0x20cf4d8c, 0xcea4d428, 0x79d130a4, 0x3486ebfb, 0x33d3cddc,
0x77853b53, 0x37effcb5, 0xc5068778, 0xe580b3e6, 0x4e68b8f4, 0xc5c8b37e, 0x0d809ea2, 0x398feb7c,
0x132a4f94, 0x43b7950e, 0x2fee7d1c, 0x223613bd, 0xdd06caa2, 0x37df932b, 0xc4248289, 0xacf3ebc3,
0x5715f6b7, 0xef3478dd, 0xf267616f, 0xc148cbe4, 0x9052815e, 0x5e410fab, 0xb48a2465, 0x2eda7fa4,
0xe87b40e4, 0xe98ea084, 0x5889e9e1, 0xefd390fc, 0xdd07d35b, 0xdb485694, 0x38d7e5b2, 0x57720101,
0x730edebc, 0x5b643113, 0x94917e4f, 0x503c2fba, 0x646f1282, 0x7523d24a, 0xe0779695, 0xf9c17a8f,
0x7a5b2121, 0xd187b896, 0x29263a4d, 0xba510cdf, 0x81f47c9f, 0xad1163ed, 0xea7b5965, 0x1a00726e,
0x11403092, 0x00da6d77, 0x4a0cdd61, 0xad1f4603, 0x605bdfb0, 0x9eedc364, 0x22ebe6a8, 0xcee7d28a,
0xa0e736a0, 0x5564a6b9, 0x10853209, 0xc7eb8f37, 0x2de705ca, 0x8951570f, 0xdf09822b, 0xbd691a6c,
0xaa12e4f2, 0x87451c0f, 0xe0f6a27a, 0x3ada4819, 0x4cf1764f, 0x0d771c2b, 0x67cdb156, 0x350d8384,
0x5938fa0f, 0x42399ef3, 0x36997b07, 0x0e84093d, 0x4aa93e61, 0x8360d87b, 0x1fa98b0c, 0x1149382c,
0xe97625a5, 0x0614d1b7, 0x0e25244b, 0x0c768347, 0x589e8d82, 0x0d2059d1, 0xa466bb1e, 0xf8da0a82,
0x04f19130, 0xba6e4ec0, 0x99265164, 0x1ee7230d, 0x50b2ad80, 0xeaee6801, 0x8db2a283, 0xea8bf59e
};


#ifdef USE_AMD64_ASM

/* Assembly implementations of CAST5. */
extern void _gcry_cast5_amd64_encrypt_block(CAST5_context *c, byte *outbuf,
					    const byte *inbuf);

extern void _gcry_cast5_amd64_decrypt_block(CAST5_context *c, byte *outbuf,
					    const byte *inbuf);

/* These assembly implementations process four blocks in parallel. */
extern void _gcry_cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out,
				      const byte *in, byte *ctr);

extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out,
				      const byte *in, byte *iv);

extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out,
				      const byte *in, byte *iv);

static void
do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
{
  _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf);
}

static void
do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
{
  _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf);
}

static void
cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr)
{
  _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr);
}

static void
cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
{
  _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv);
}

static void
cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
{
  _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv);
}

static unsigned int
encrypt_block (void *context , byte *outbuf, const byte *inbuf)
{
  CAST5_context *c = (CAST5_context *) context;
  do_encrypt_block (c, outbuf, inbuf);
  return /*burn_stack*/ (2*8);
}

static unsigned int
decrypt_block (void *context, byte *outbuf, const byte *inbuf)
{
  CAST5_context *c = (CAST5_context *) context;
  do_decrypt_block (c, outbuf, inbuf);
  return /*burn_stack*/ (2*8);
}

#elif defined(USE_ARM_ASM)

/* ARM assembly implementations of CAST5. */
extern void _gcry_cast5_arm_encrypt_block(CAST5_context *c, byte *outbuf,
					    const byte *inbuf);

extern void _gcry_cast5_arm_decrypt_block(CAST5_context *c, byte *outbuf,
					    const byte *inbuf);

/* These assembly implementations process two blocks in parallel. */
extern void _gcry_cast5_arm_ctr_enc(CAST5_context *ctx, byte *out,
				      const byte *in, byte *ctr);

extern void _gcry_cast5_arm_cbc_dec(CAST5_context *ctx, byte *out,
				      const byte *in, byte *iv);

extern void _gcry_cast5_arm_cfb_dec(CAST5_context *ctx, byte *out,
				      const byte *in, byte *iv);

static void
do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
{
  _gcry_cast5_arm_encrypt_block (context, outbuf, inbuf);
}

static void
do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
{
  _gcry_cast5_arm_decrypt_block (context, outbuf, inbuf);
}

static unsigned int
encrypt_block (void *context , byte *outbuf, const byte *inbuf)
{
  CAST5_context *c = (CAST5_context *) context;
  do_encrypt_block (c, outbuf, inbuf);
  return /*burn_stack*/ (10*4);
}

static unsigned int
decrypt_block (void *context, byte *outbuf, const byte *inbuf)
{
  CAST5_context *c = (CAST5_context *) context;
  do_decrypt_block (c, outbuf, inbuf);
  return /*burn_stack*/ (10*4);
}

#else /*USE_ARM_ASM*/

#define F1(D,m,r)  (  (I = ((m) + (D))), (I=rol(I,(r))),   \
    (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]) )
#define F2(D,m,r)  (  (I = ((m) ^ (D))), (I=rol(I,(r))),   \
    (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]) )
#define F3(D,m,r)  (  (I = ((m) - (D))), (I=rol(I,(r))),   \
    (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]) )

static void
do_encrypt_block( CAST5_context *c, byte *outbuf, const byte *inbuf )
{
    u32 l, r, t;
    u32 I;   /* used by the Fx macros */
    u32 *Km;
    u32 Kr;

    Km = c->Km;
    Kr = buf_get_le32(c->Kr + 0);

    /* (L0,R0) <-- (m1...m64).	(Split the plaintext into left and
     * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.)
     */
    l = buf_get_be32(inbuf + 0);
    r = buf_get_be32(inbuf + 4);

    /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows:
     *	Li = Ri-1;
     *	Ri = Li-1 ^ f(Ri-1,Kmi,Kri), where f is defined in Section 2.2
     * Rounds 1, 4, 7, 10, 13, and 16 use f function Type 1.
     * Rounds 2, 5, 8, 11, and 14 use f function Type 2.
     * Rounds 3, 6, 9, 12, and 15 use f function Type 3.
     */

    t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr = buf_get_le32(c->Kr + 4);
    t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr = buf_get_le32(c->Kr + 8);
    t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr = buf_get_le32(c->Kr + 12);
    t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31);

    /* c1...c64 <-- (R16,L16).	(Exchange final blocks L16, R16 and
     *	concatenate to form the ciphertext.) */
    buf_put_be32(outbuf + 0, r);
    buf_put_be32(outbuf + 4, l);
}

static unsigned int
encrypt_block (void *context , byte *outbuf, const byte *inbuf)
{
  CAST5_context *c = (CAST5_context *) context;
  do_encrypt_block (c, outbuf, inbuf);
  return /*burn_stack*/ (20+4*sizeof(void*));
}


static void
do_encrypt_block_3( CAST5_context *c, byte *outbuf, const byte *inbuf )
{
    u32 l0, r0, t0, l1, r1, t1, l2, r2, t2;
    u32 I;   /* used by the Fx macros */
    u32 *Km;
    u32 Kr;

    Km = c->Km;
    Kr = buf_get_le32(c->Kr + 0);

    l0 = buf_get_be32(inbuf + 0);
    r0 = buf_get_be32(inbuf + 4);
    l1 = buf_get_be32(inbuf + 8);
    r1 = buf_get_be32(inbuf + 12);
    l2 = buf_get_be32(inbuf + 16);
    r2 = buf_get_be32(inbuf + 20);

    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31);
    Kr = buf_get_le32(c->Kr + 4);
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31);
    Kr = buf_get_le32(c->Kr + 8);
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31);
    Kr = buf_get_le32(c->Kr + 12);
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31);

    buf_put_be32(outbuf + 0, r0);
    buf_put_be32(outbuf + 4, l0);
    buf_put_be32(outbuf + 8, r1);
    buf_put_be32(outbuf + 12, l1);
    buf_put_be32(outbuf + 16, r2);
    buf_put_be32(outbuf + 20, l2);
}


static void
do_decrypt_block (CAST5_context *c, byte *outbuf, const byte *inbuf )
{
    u32 l, r, t;
    u32 I;
    u32 *Km;
    u32 Kr;

    Km = c->Km;
    Kr = buf_get_be32(c->Kr + 12);

    l = buf_get_be32(inbuf + 0);
    r = buf_get_be32(inbuf + 4);

    t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr = buf_get_be32(c->Kr + 8);
    t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr = buf_get_be32(c->Kr + 4);
    t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr = buf_get_be32(c->Kr + 0);
    t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8;
    t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31);

    buf_put_be32(outbuf + 0, r);
    buf_put_be32(outbuf + 4, l);
}

static unsigned int
decrypt_block (void *context, byte *outbuf, const byte *inbuf)
{
  CAST5_context *c = (CAST5_context *) context;
  do_decrypt_block (c, outbuf, inbuf);
  return /*burn_stack*/ (20+4*sizeof(void*));
}


static void
do_decrypt_block_3 (CAST5_context *c, byte *outbuf, const byte *inbuf )
{
    u32 l0, r0, t0, l1, r1, t1, l2, r2, t2;
    u32 I;
    u32 *Km;
    u32 Kr;

    Km = c->Km;
    Kr = buf_get_be32(c->Kr + 12);

    l0 = buf_get_be32(inbuf + 0);
    r0 = buf_get_be32(inbuf + 4);
    l1 = buf_get_be32(inbuf + 8);
    r1 = buf_get_be32(inbuf + 12);
    l2 = buf_get_be32(inbuf + 16);
    r2 = buf_get_be32(inbuf + 20);

    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31);
    Kr = buf_get_be32(c->Kr + 8);
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31);
    Kr = buf_get_be32(c->Kr + 4);
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31);
    Kr = buf_get_be32(c->Kr + 0);
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31);
    Kr >>= 8;
    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31);
	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31);
		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31);

    buf_put_be32(outbuf + 0, r0);
    buf_put_be32(outbuf + 4, l0);
    buf_put_be32(outbuf + 8, r1);
    buf_put_be32(outbuf + 12, l1);
    buf_put_be32(outbuf + 16, r2);
    buf_put_be32(outbuf + 20, l2);
}

#endif /*!USE_ARM_ASM*/


/* Bulk encryption of complete blocks in CTR mode.  This function is only
   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
   of size CAST5_BLOCKSIZE. */
static void
_gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
		    const void *inbuf_arg, size_t nblocks)
{
  CAST5_context *ctx = context;
  unsigned char *outbuf = outbuf_arg;
  const unsigned char *inbuf = inbuf_arg;
  unsigned char tmpbuf[CAST5_BLOCKSIZE * 3];
  int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;

#ifdef USE_AMD64_ASM
  {
    if (nblocks >= 4)
      burn_stack_depth += 8 * sizeof(void*);

    /* Process data in 4 block chunks. */
    while (nblocks >= 4)
      {
        cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);

        nblocks -= 4;
        outbuf += 4 * CAST5_BLOCKSIZE;
        inbuf  += 4 * CAST5_BLOCKSIZE;
      }

    /* Use generic code to handle smaller chunks... */
  }
#elif defined(USE_ARM_ASM)
  {
    /* Process data in 2 block chunks. */
    while (nblocks >= 2)
      {
        _gcry_cast5_arm_ctr_enc(ctx, outbuf, inbuf, ctr);

        nblocks -= 2;
        outbuf += 2 * CAST5_BLOCKSIZE;
        inbuf  += 2 * CAST5_BLOCKSIZE;
      }

    /* Use generic code to handle smaller chunks... */
  }
#endif

#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
  for ( ;nblocks >= 3; nblocks -= 3)
    {
      /* Prepare the counter blocks. */
      cipher_block_cpy (tmpbuf + 0, ctr, CAST5_BLOCKSIZE);
      cipher_block_cpy (tmpbuf + 8, ctr, CAST5_BLOCKSIZE);
      cipher_block_cpy (tmpbuf + 16, ctr, CAST5_BLOCKSIZE);
      cipher_block_add (tmpbuf + 8, 1, CAST5_BLOCKSIZE);
      cipher_block_add (tmpbuf + 16, 2, CAST5_BLOCKSIZE);
      cipher_block_add (ctr, 3, CAST5_BLOCKSIZE);
      /* Encrypt the counter. */
      do_encrypt_block_3(ctx, tmpbuf, tmpbuf);
      /* XOR the input with the encrypted counter and store in output.  */
      buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE * 3);
      outbuf += CAST5_BLOCKSIZE * 3;
      inbuf  += CAST5_BLOCKSIZE * 3;
    }
#endif

  for ( ;nblocks; nblocks-- )
    {
      /* Encrypt the counter. */
      do_encrypt_block(ctx, tmpbuf, ctr);
      /* XOR the input with the encrypted counter and store in output.  */
      cipher_block_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE);
      outbuf += CAST5_BLOCKSIZE;
      inbuf  += CAST5_BLOCKSIZE;
      /* Increment the counter.  */
      cipher_block_add (ctr, 1, CAST5_BLOCKSIZE);
    }

  wipememory(tmpbuf, sizeof(tmpbuf));
  _gcry_burn_stack(burn_stack_depth);
}


/* Bulk decryption of complete blocks in CBC mode.  This function is only
   intended for the bulk encryption feature of cipher.c. */
static void
_gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
		    const void *inbuf_arg, size_t nblocks)
{
  CAST5_context *ctx = context;
  unsigned char *outbuf = outbuf_arg;
  const unsigned char *inbuf = inbuf_arg;
  unsigned char savebuf[CAST5_BLOCKSIZE * 3];
  int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;

#ifdef USE_AMD64_ASM
  {
    if (nblocks >= 4)
      burn_stack_depth += 8 * sizeof(void*);

    /* Process data in 4 block chunks. */
    while (nblocks >= 4)
      {
        cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv);

        nblocks -= 4;
        outbuf += 4 * CAST5_BLOCKSIZE;
        inbuf  += 4 * CAST5_BLOCKSIZE;
      }

    /* Use generic code to handle smaller chunks... */
  }
#elif defined(USE_ARM_ASM)
  {
    /* Process data in 2 block chunks. */
    while (nblocks >= 2)
      {
        _gcry_cast5_arm_cbc_dec(ctx, outbuf, inbuf, iv);

        nblocks -= 2;
        outbuf += 2 * CAST5_BLOCKSIZE;
        inbuf  += 2 * CAST5_BLOCKSIZE;
      }

    /* Use generic code to handle smaller chunks... */
  }
#endif

#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
  for ( ;nblocks >= 3; nblocks -= 3)
    {
      /* INBUF is needed later and it may be identical to OUTBUF, so store
         the intermediate result to SAVEBUF.  */
      do_decrypt_block_3 (ctx, savebuf, inbuf);

      cipher_block_xor_1 (savebuf + 0, iv, CAST5_BLOCKSIZE);
      cipher_block_xor_1 (savebuf + 8, inbuf, CAST5_BLOCKSIZE * 2);
      cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE);
      buf_cpy (outbuf, savebuf, CAST5_BLOCKSIZE * 3);
      inbuf += CAST5_BLOCKSIZE * 3;
      outbuf += CAST5_BLOCKSIZE * 3;
    }
#endif

  for ( ;nblocks; nblocks-- )
    {
      /* INBUF is needed later and it may be identical to OUTBUF, so store
         the intermediate result to SAVEBUF.  */
      do_decrypt_block (ctx, savebuf, inbuf);

      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE);
      inbuf += CAST5_BLOCKSIZE;
      outbuf += CAST5_BLOCKSIZE;
    }

  wipememory(savebuf, sizeof(savebuf));
  _gcry_burn_stack(burn_stack_depth);
}

/* Bulk decryption of complete blocks in CFB mode.  This function is only
   intended for the bulk encryption feature of cipher.c. */
static void
_gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
		    const void *inbuf_arg, size_t nblocks)
{
  CAST5_context *ctx = context;
  unsigned char *outbuf = outbuf_arg;
  const unsigned char *inbuf = inbuf_arg;
  unsigned char tmpbuf[CAST5_BLOCKSIZE * 3];
  int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;

#ifdef USE_AMD64_ASM
  {
    if (nblocks >= 4)
      burn_stack_depth += 8 * sizeof(void*);

    /* Process data in 4 block chunks. */
    while (nblocks >= 4)
      {
        cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv);

        nblocks -= 4;
        outbuf += 4 * CAST5_BLOCKSIZE;
        inbuf  += 4 * CAST5_BLOCKSIZE;
      }

    /* Use generic code to handle smaller chunks... */
  }
#elif defined(USE_ARM_ASM)
  {
    /* Process data in 2 block chunks. */
    while (nblocks >= 2)
      {
        _gcry_cast5_arm_cfb_dec(ctx, outbuf, inbuf, iv);

        nblocks -= 2;
        outbuf += 2 * CAST5_BLOCKSIZE;
        inbuf  += 2 * CAST5_BLOCKSIZE;
      }

    /* Use generic code to handle smaller chunks... */
  }
#endif

#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
  for ( ;nblocks >= 3; nblocks -= 3 )
    {
      cipher_block_cpy (tmpbuf + 0, iv, CAST5_BLOCKSIZE);
      cipher_block_cpy (tmpbuf + 8, inbuf + 0, CAST5_BLOCKSIZE * 2);
      cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE);
      do_encrypt_block_3 (ctx, tmpbuf, tmpbuf);
      buf_xor (outbuf, inbuf, tmpbuf, CAST5_BLOCKSIZE * 3);
      outbuf += CAST5_BLOCKSIZE * 3;
      inbuf  += CAST5_BLOCKSIZE * 3;
    }
#endif

  for ( ;nblocks; nblocks-- )
    {
      do_encrypt_block(ctx, iv, iv);
      cipher_block_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE);
      outbuf += CAST5_BLOCKSIZE;
      inbuf  += CAST5_BLOCKSIZE;
    }

  wipememory(tmpbuf, sizeof(tmpbuf));
  _gcry_burn_stack(burn_stack_depth);
}


/* Run the self-tests for CAST5-CTR, tests IV increment of bulk CTR
   encryption.  Returns NULL on success. */
static const char *
selftest_ctr (void)
{
  const int nblocks = 4+1;
  const int blocksize = CAST5_BLOCKSIZE;
  const int context_size = sizeof(CAST5_context);

  return _gcry_selftest_helper_ctr("CAST5", &cast_setkey,
           &encrypt_block, nblocks, blocksize, context_size);
}


/* Run the self-tests for CAST5-CBC, tests bulk CBC decryption.
   Returns NULL on success. */
static const char *
selftest_cbc (void)
{
  const int nblocks = 4+2;
  const int blocksize = CAST5_BLOCKSIZE;
  const int context_size = sizeof(CAST5_context);

  return _gcry_selftest_helper_cbc("CAST5", &cast_setkey,
           &encrypt_block, nblocks, blocksize, context_size);
}


/* Run the self-tests for CAST5-CFB, tests bulk CBC decryption.
   Returns NULL on success. */
static const char *
selftest_cfb (void)
{
  const int nblocks = 4+2;
  const int blocksize = CAST5_BLOCKSIZE;
  const int context_size = sizeof(CAST5_context);

  return _gcry_selftest_helper_cfb("CAST5", &cast_setkey,
           &encrypt_block, nblocks, blocksize, context_size);
}


static const char*
selftest(void)
{
    CAST5_context c;
    cipher_bulk_ops_t bulk_ops;
    static const byte key[16] =
                    { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78,
		      0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A  };
    static const byte plain[8] =
                    { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF };
    static const byte cipher[8] =
                    { 0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2 };
    byte buffer[8];
    const char *r;

    cast_setkey( &c, key, 16, &bulk_ops );
    encrypt_block( &c, buffer, plain );
    if( memcmp( buffer, cipher, 8 ) )
	return "1";
    decrypt_block( &c, buffer, buffer );
    if( memcmp( buffer, plain, 8 ) )
	return "2";

#if 0 /* full maintenance test */
    {
	int i;
	byte a0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78,
			0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A };
	byte b0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78,
			0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A };
	byte a1[16] = { 0xEE,0xA9,0xD0,0xA2,0x49,0xFD,0x3B,0xA6,
			0xB3,0x43,0x6F,0xB8,0x9D,0x6D,0xCA,0x92 };
	byte b1[16] = { 0xB2,0xC9,0x5E,0xB0,0x0C,0x31,0xAD,0x71,
			0x80,0xAC,0x05,0xB8,0xE8,0x3D,0x69,0x6E };

	for(i=0; i < 1000000; i++ ) {
	    cast_setkey( &c, b0, 16, &bulk_ops );
	    encrypt_block( &c, a0, a0 );
	    encrypt_block( &c, a0+8, a0+8 );
	    cast_setkey( &c, a0, 16, &bulk_ops );
	    encrypt_block( &c, b0, b0 );
	    encrypt_block( &c, b0+8, b0+8 );
	}
	if( memcmp( a0, a1, 16 ) || memcmp( b0, b1, 16 ) )
	    return "3";

    }
#endif

    if ( (r = selftest_cbc ()) )
      return r;

    if ( (r = selftest_cfb ()) )
      return r;

    if ( (r = selftest_ctr ()) )
      return r;

    return NULL;
}


static void
key_schedule( u32 *x, u32 *z, u32 *k )
{

#define xi(i)   ((x[(i)/4] >> (8*(3-((i)%4)))) & 0xff)
#define zi(i)   ((z[(i)/4] >> (8*(3-((i)%4)))) & 0xff)

    z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)];
    z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)];
    z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)];
    z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)];
    k[0] = s5[zi( 8)]^s6[zi( 9)]^s7[zi( 7)]^s8[zi( 6)]^s5[zi( 2)];
    k[1] = s5[zi(10)]^s6[zi(11)]^s7[zi( 5)]^s8[zi( 4)]^s6[zi( 6)];
    k[2] = s5[zi(12)]^s6[zi(13)]^s7[zi( 3)]^s8[zi( 2)]^s7[zi( 9)];
    k[3] = s5[zi(14)]^s6[zi(15)]^s7[zi( 1)]^s8[zi( 0)]^s8[zi(12)];

    x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)];
    x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)];
    x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)];
    x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)];
    k[4] = s5[xi( 3)]^s6[xi( 2)]^s7[xi(12)]^s8[xi(13)]^s5[xi( 8)];
    k[5] = s5[xi( 1)]^s6[xi( 0)]^s7[xi(14)]^s8[xi(15)]^s6[xi(13)];
    k[6] = s5[xi( 7)]^s6[xi( 6)]^s7[xi( 8)]^s8[xi( 9)]^s7[xi( 3)];
    k[7] = s5[xi( 5)]^s6[xi( 4)]^s7[xi(10)]^s8[xi(11)]^s8[xi( 7)];

    z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)];
    z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)];
    z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)];
    z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)];
    k[8] = s5[zi( 3)]^s6[zi( 2)]^s7[zi(12)]^s8[zi(13)]^s5[zi( 9)];
    k[9] = s5[zi( 1)]^s6[zi( 0)]^s7[zi(14)]^s8[zi(15)]^s6[zi(12)];
    k[10]= s5[zi( 7)]^s6[zi( 6)]^s7[zi( 8)]^s8[zi( 9)]^s7[zi( 2)];
    k[11]= s5[zi( 5)]^s6[zi( 4)]^s7[zi(10)]^s8[zi(11)]^s8[zi( 6)];

    x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)];
    x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)];
    x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)];
    x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)];
    k[12]= s5[xi( 8)]^s6[xi( 9)]^s7[xi( 7)]^s8[xi( 6)]^s5[xi( 3)];
    k[13]= s5[xi(10)]^s6[xi(11)]^s7[xi( 5)]^s8[xi( 4)]^s6[xi( 7)];
    k[14]= s5[xi(12)]^s6[xi(13)]^s7[xi( 3)]^s8[xi( 2)]^s7[xi( 8)];
    k[15]= s5[xi(14)]^s6[xi(15)]^s7[xi( 1)]^s8[xi( 0)]^s8[xi(13)];

#undef xi
#undef zi
}


static gcry_err_code_t
do_cast_setkey( CAST5_context *c, const byte *key, unsigned keylen )
{
  static int initialized;
  static const char* selftest_failed;
  int i;
  u32 x[4];
  u32 z[4];
  u32 k[16];

  if( !initialized )
    {
      initialized = 1;
      selftest_failed = selftest();
      if( selftest_failed )
        log_error ("CAST5 selftest failed (%s).\n", selftest_failed );
    }
  if( selftest_failed )
    return GPG_ERR_SELFTEST_FAILED;

  if( keylen != 16 )
    return GPG_ERR_INV_KEYLEN;

  x[0] = buf_get_be32(key + 0);
  x[1] = buf_get_be32(key + 4);
  x[2] = buf_get_be32(key + 8);
  x[3] = buf_get_be32(key + 12);

  key_schedule( x, z, k );
  for(i=0; i < 16; i++ )
    c->Km[i] = k[i];
  key_schedule( x, z, k );
  for(i=0; i < 16; i++ )
    c->Kr[i] = k[i] & 0x1f;

#ifdef USE_ARM_ASM
  for (i = 0; i < 4; i++)
    {
      byte Kr_arm[4];

      /* Convert rotate left to rotate right and add shift left
       * by 2.  */
      Kr_arm[0] = ((32 - c->Kr[4 * i + 0]) - 2) & 0x1f;
      Kr_arm[1] = ((32 - c->Kr[4 * i + 1]) - 2) & 0x1f;
      Kr_arm[2] = ((32 - c->Kr[4 * i + 2]) - 2) & 0x1f;
      Kr_arm[3] = ((32 - c->Kr[4 * i + 3]) - 2) & 0x1f;

      /* Endian friendly store.  */
      c->Kr_arm_enc[i] = Kr_arm[0] |
                        (Kr_arm[1] << 8) |
                        (Kr_arm[2] << 16) |
                        (Kr_arm[3] << 24);
      c->Kr_arm_dec[i] = Kr_arm[3] |
                        (Kr_arm[2] << 8) |
                        (Kr_arm[1] << 16) |
                        (Kr_arm[0] << 24);

      wipememory(Kr_arm, sizeof(Kr_arm));
    }
#endif

  wipememory(x, sizeof x);
  wipememory(z, sizeof z);
  wipememory(k, sizeof k);

#undef xi
#undef zi
  return GPG_ERR_NO_ERROR;
}

static gcry_err_code_t
cast_setkey (void *context, const byte *key, unsigned keylen,
             cipher_bulk_ops_t *bulk_ops)
{
  CAST5_context *c = (CAST5_context *) context;
  gcry_err_code_t rc = do_cast_setkey (c, key, keylen);

  /* Setup bulk encryption routines.  */
  memset (bulk_ops, 0, sizeof(*bulk_ops));
  bulk_ops->cfb_dec = _gcry_cast5_cfb_dec;
  bulk_ops->cbc_dec = _gcry_cast5_cbc_dec;
  bulk_ops->ctr_enc = _gcry_cast5_ctr_enc;

  return rc;
}


gcry_cipher_spec_t _gcry_cipher_spec_cast5 =
  {
    GCRY_CIPHER_CAST5, {0, 0},
    "CAST5", NULL, NULL, CAST5_BLOCKSIZE, 128, sizeof (CAST5_context),
    cast_setkey, encrypt_block, decrypt_block
  };
